Construct data frame
load("XSTSF_production.RData")
source('functions.R')
# add manual sandhi labels
label_sandhi <- read.csv('raw_data/sandhi_label.csv',
na.strings = '')
f0_all_pre_label <- f0_all_pre %>%
select(!sandhi_tone) %>%
left_join(label_sandhi[, c('ind_no', 'sandhi_tone', 'sandhi_tone_var', 'diortri')],
by = c('diortri', 'ind_no')) %>%
mutate(sandhi_tone_var = case_when(is.na(sandhi_tone_var) == TRUE ~ sandhi_tone,
.default = sandhi_tone_var)) %>%
rename(normtime = time)
# get disyllabic citation data
f0_di <- f0_all_pre_label %>% filter(diortri == 'di')
f0_di_ct <- f0_di %>% filter(focus_condition == 'ct') %>%
# re-normalisation
group_by(speaker) %>%
mutate(f0ref = mean(f0, na.rm = T),
norm_f0 = scale(log(f0))) %>%
ungroup()
# get H- & L-register initial di citation data
f0_di_ct_h <- f0_di_ct %>% filter(grepl('^H', mono_tone_1))
f0_di_ct_l <- f0_di_ct %>% filter(grepl('^[LR]', mono_tone_1))
draw_by
## function (dataframe, x, y)
## {
## p <- dataframe %>% ggplot(aes(x = normtime, y = norm_f0,
## group = interaction(syllable_no, ind_no), color = citation_no,
## linetype = citation_no, text = paste("speaker: ", speaker,
## "\ncitation tone: ", citation_tone, "\ncitation no:",
## citation_no, "\ntoken: ", token))) + geom_line() +
## {
## if (missing(y)) {
## facet_wrap(as.formula(paste("~", x)), ncol = 2,
## labeller = label_both)
## }
## else {
## facet_grid(as.formula(paste(y, "~", x)), labeller = label_value)
## }
## } + theme_bw() + theme(panel.spacing.y = unit(0.02, "cm",
## data = NULL), text = element_text(size = 10)) + ylim(-4,
## 4)
## p
## }
getwd()
## [1] "/Users/shiyibing/Desktop/PhD/XSTSF"
f0_di_lcmh_hp <- f0_di_ct_h %>% filter(grepl("^[LM].*p$", syntax_iniTone))
ggplotly(draw_by(f0_di_lcmh_hp, 'speaker'), tooltip = c('text', 'x'))
f0_di_lcmh_hs <- f0_di_ct_h %>% filter(grepl("^[LM].*s$", syntax_iniTone))
ggplotly(draw_by(f0_di_lcmh_hs, 'speaker'), tooltip = c('text', 'x'))
f0_di_lcmh_lp <-f0_di_ct_l %>% filter(grepl("^[LM].*p$", syntax_iniTone))
ggplotly(draw_by(f0_di_lcmh_lp, 'speaker'), tooltip = c('text', 'x'))
f0_di_lcmh_ls <-f0_di_ct_l %>% filter(grepl("^[LM].*s$", syntax_iniTone))
ggplotly(draw_by(f0_di_lcmh_ls, 'speaker'), tooltip = c('text', 'x'))
f0_di_lcmh <- rbind(f0_di_lcmh_hp, f0_di_lcmh_hs) %>%
mutate(sandhi_tone = ifelse(sandhi_tone == 'HLLM', 'HMML', sandhi_tone),
propdur = as.integer(normtime)/20) %>%
unite('groupvar', ind_no, syllable_no, sep = '_', remove = FALSE) %>%
filter(is.na(sandhi_tone) == FALSE)
unique(f0_di_lcmh$sandhi_tone) # check the labels
## [1] "HMML" "MHHL" "MMMH" "HHHH"
p_cluster(f0_di_lcmh, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p_cluster(f0_di_lcmh, sandhi_tone, 'speaker')
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
try doing k-means for the whole disyllabic citation dataset
f0_di_lcmh <- f0_di_ct %>% filter(grepl("^[LM]", syntax_iniTone)) %>%
select(-diortri, -syllable_no, -focus_no, -f0) %>%
spread(normtime, norm_f0)
start <- which(names(f0_di_lcmh)==1)
end <- which(names(f0_di_lcmh)== 20)
f0_di_lcmh_cluster<- cld(f0_di_lcmh, idAll=f0_di_lcmh$ind_no, timeInData=start:end, time=c(start:end))
kml(f0_di_lcmh_cluster, nbClusters = 2:10)
## ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
plot(f0_di_lcmh_cluster, 7, parTraj=parTRAJ(col="clusters"))